* Clean up

clear all

set maxvar 10000

/*
* Set working directory: please set your own
cd "/Users/vicentevalentim/Desktop/fragmentation_replication_bjps"
*/

* Open dataset
use Data/fragmentation_partylevel.dta, clear

* Generating variable performance, indicating whether a party is above the threshold
drop performance
gen performance = pervote - threshold

/* Generating a variable for bandwidths 5, 10, 15 and 20 percent above threshold 
and generating a variable that indicates whether each party is inside that bandwidth */

forvalues y = 10(1)100 {
gen bw`y' = threshold + (`y' * 0.01 * threshold)
gen inbw`y' = 0
replace inbw`y' = 1 if pervote <= bw`y' & performance >= 0
}


forvalues y = 10(1)100 {
	
gen bw`y'_below = threshold - (`y' * 0.01 * threshold)
gen inbw`y'_below = 0
replace inbw`y'_below = 1 if pervote >= bw`y'_below & pervote < threshold & performance < 0
}


** Manipulations based on the election-level do file by Nicolas Merz (although slightly changed)
* do-file can be found at http://manifesto-project.wzb.eu/datasets/mpelds

** number of parties
bysort country date: gen cnt_parties = _N
sum cnt_parties

* total sum of voteshares
bysort country date: egen sum_pervote = sum(pervote)
sum sum_pervote

* total sum of voteshares *of parties in parliament* (added to the original do file)
bysort country date: egen sum_pervote_parl = total(pervote) if treat == 1

* total sum of seats
drop sum_seats
bysort country date: egen sum_seats = total(absseat)
gen total_seats = totseats 

** number of parliamentary parties
gen in_parl = 0
replace in_parl = 1 if absseat > 0 & absseat !=.
bysort country date: egen cnt_parl_parties = total(in_parl)

** number of non-parliamentary parties
gen out_of_parl = 0
replace out_of_parl = 1 if absseat == 0 & absseat !=.
bysort country date: egen cnt_nonparl_parties = total(out_of_parl)

*** effective number of parliamentary parties
drop seatshare
gen seatshare = absseat / sum_seats
gen seatshare2 = seatshare^2
bys country date: egen total_seatshare2 = total(seatshare2)
bys country date: gen enpp = 1/(total_seatshare2)

*** effective number of electoral parties
gen pervotesq = (pervote/sum_pervote)^2
bys country date: egen total_pervotesq = total(pervotesq)
bys country date: gen enep = 1/(total_pervotesq)

*** effective number of electoral parties *of parties in parliament* (added to the original do file)
gen pervotesq_parl = (pervote/sum_pervote_parl)^2
bys country date: egen total_pervotesq_parl = total(pervotesq_parl)
bys country date: gen enep_parl = 1/(total_pervotesq_parl)

*** rile_mean
bys country date: egen rile_mean = mean(rile)

*** rile_mean *of parties in parliament* (added to the original do file) 
bys country date: egen rile_mean_parl = mean(rile) if treat == 1

*** ideological center of gravity (Sigelmann & ...) 
gen weighted_rile = rile * pervote/sum_pervote
bys country date: egen rile_wmean = total(weighted_rile)

*** ideological center of gravity *of parties in parliament* (added to the original do file)  
gen weighted_rile_parl = rile * pervote/sum_pervote_parl if treat == 1
bys country date: egen rile_wmean_parl = total(weighted_rile_parl) if treat == 1

*** polarization index (Dalton 2008)
gen distances = (((rile - rile_wmean) / 100) ^ 2) * pervote
bys country date: egen total_distances = total(distances)
gen rile_polarization = sqrt(total_distances)

*** polarization index (Dalton 2008) *for parties in parliament* (added to the original do file)
gen distances_parl = (((rile - rile_wmean_parl) / 100) ^ 2) * pervote
bys country date: egen total_distances_parl = total(distances_parl) if treat == 1
gen rile_polarization_parl = sqrt(total_distances_parl)
* replacing 0's with missings
replace rile_polarization_parl = . if rile_polarization_parl == 0
replace rile_polarization = . if rile_polarization == 0

label variable country "country"
label variable countryname "English name of the country"
label variable rile_polarization "ideological polarization (Dalton 2008)"
label variable cnt_parties "number of parties covered for the election"
label variable cnt_parl_parties "number of parliamentary parties coded in the Manifesto Dataset"
label variable sum_pervote "sum of vote share of parties covered for the election"
label variable sum_seats "number of seats occupied by parties that are covered by the Manifesto dataset"
label variable total_seats "total number of seats in the parliament"
label variable date "election date"

/* Generating variables that give us the added value of ENPP
 provided by the parties just above the threshold */

forvalues y = 10(1)100 {
bysort country date: egen sum_seats_wo`y' = total(absseat) if inbw`y' == 0 & treat > 0
gen seatshare_wo`y' = absseat / sum_seats_wo`y' if inbw`y' == 0 & treat > 0
gen seatshare2_wo`y' = seatshare_wo`y'^2
bysort country date: egen total_seatshare2_wo`y' = total(seatshare2_wo`y')
bysort country date: gen enpp_wo`y' = 1/(total_seatshare2_wo`y')
gen enppadd_bw`y' = enpp - enpp_wo`y'
}

/* Generating variables that give us the added value of ENEP
 provided by the parties just above the threshold */

forvalues y = 10(1)100 {
bysort country date: egen sum_pervote_parl_wo`y' = total(pervote) if inbw`y' == 0 & treat > 0
gen pervotesq_parl_wo`y' = (pervote/sum_pervote_parl_wo`y')^2
bys country date: egen total_pervotesq_parl_wo`y' = total(pervotesq_parl_wo`y')
bys country date: gen enep_parl_wo`y' = 1/(total_pervotesq_parl_wo`y')
gen enepparladd_bw`y' = enep_parl - enep_parl_wo`y'
}


/* Generating variables that give us the added value of polarization
* provided by the parties just above the threshold */

forvalues y = 10(1)100 {

*** rile_mean without parties in the bw
bys country date: egen rile_mean_wo`y' = mean(rile) if inbw`y' == 0 & treat == 1

*** ideological center of gravity (Sigelmann & ...) without parties in the bw
bys country date: egen rile_wmean_wo`y' = total(weighted_rile_parl) if inbw`y' == 0 & treat == 1

* Level of polarization without parties in the bw 
bys country date: egen total_distances_wo`y' = total(distances_parl) if inbw`y' == 0 & treat > 0
gen rile_polarization_wo`y' = sqrt(total_distances_wo`y')

* Subtracting level of polarization without parties in the bw to total level of polarization
gen poladd_bw`y' = rile_polarization_parl - rile_polarization_wo`y'
}

* Generating variable for relevant parties (those above 2, 4, 5, or 10 per cent of the vote)
gen rel2 = 0
replace rel2 = 1 if pervote > 2

gen rel4 = 0
replace rel4 = 1 if pervote > 4

gen rel5 = 0
replace rel5 = 1 if pervote > 5

gen rel10 = 0
replace rel10 = 1 if pervote > 10

** Adding dummy for EU members and applicants
recode eumember 0 = 0 10 20 = 1, gen(eu)

** Adding data on Polity IV scores
gen polity2 = .

gen year2 = int(date/100)
replace year = year2 if year == .

tostring year, replace
gen countryyear = countryname+year
destring year, replace

save Data/fragpol_cmp, replace

import excel using Data/p4v2017.xls, sheet("p4v2017") firstrow clear

tostring year, replace
gen countryyear = country+year
destring year, replace

drop country

merge m:m countryyear using Data/fragpol_cmp, gen(merge2)

drop if merge2 == 1

drop countryyear

drop country
encode countryname, gen(country)
* Generating a variable that tells us the difference between closest party and threshold
* First generate a variable that gives us the absolute level of performance
gen perf_absolute = abs(performance)
sum perf_absolute


* Generate a variable that tells us the rank of the parties in terms of their absolute performance
sort cntrydate perf_absolute
by cntrydate: gen perf_absolute_rank = _n

* Generate a variable that indicates the first, second, third, and fourth party
forvalues x = 1/4{
gen closest_party_`x' = 0
replace closest_party_`x' = 1 if perf_absolute_rank == `x'

* Generate a variable that gives us the performance of the first, second, third, and fourth party
gen perf_closest_party_`x' = performance if closest_party_`x' == 1
replace closest_party_`x' = 1 if perf_absolute_rank == `x'

* Same thing, normalized according to the value of the threshold
gen perf_closest_party_`x'_norm = performance / threshold if closest_party_`x' == 1

* Create polynomials of the performance of the closest parties
gen perf_closest_party_`x'_sq = perf_closest_party_`x' ^ 2
gen perf_closest_party_`x'_3 = perf_closest_party_`x' ^ 3
gen perf_closest_party_`x'_4 = perf_closest_party_`x' ^ 4

* Create polynomials of the performance of the closest parties
gen perf_closest_party_`x'_norm_sq = perf_closest_party_`x' ^ 2
gen perf_closest_party_`x'_norm_3 = perf_closest_party_`x' ^ 3
gen perf_closest_party_`x'_norm_4 = perf_closest_party_`x' ^ 4

* Generate a variable that indicates if the first, second, third, and fourth party are in parliament or not
gen treated_closest_party_`x' = .
replace treated_closest_party_`x' = 1 if perf_absolute_rank == `x'
replace treated_closest_party_`x' = 0 if performance < 0

}

save Data/cmp_beforecollapse, replace

* Collapsing

collapse year eu threshold date enep enpp cnt_parl_parties ///
rile_polarization rile_polarization_parl country ///
cnt_parties sum_seats poladd_bw20 poladd_bw30 poladd_bw40 poladd_bw50  ///
  polity2 enep_parl sum_pervote ///
enppadd_bw10 enppadd_bw11 enppadd_bw12 enppadd_bw13 enppadd_bw14 enppadd_bw15 ///
 enppadd_bw16 enppadd_bw17 enppadd_bw18 enppadd_bw19 enppadd_bw20 enppadd_bw21 ///
 enppadd_bw22 enppadd_bw23 enppadd_bw24 enppadd_bw25 enppadd_bw26 enppadd_bw27 ///
 enppadd_bw28 enppadd_bw29 enppadd_bw30 enppadd_bw31 enppadd_bw32 enppadd_bw33 ///
 enppadd_bw34 enppadd_bw35 enppadd_bw36 enppadd_bw37 enppadd_bw38 enppadd_bw39 ///
 enppadd_bw40 enppadd_bw41 enppadd_bw42 enppadd_bw43 enppadd_bw44 enppadd_bw45 ///
 enppadd_bw46 enppadd_bw47 enppadd_bw48 enppadd_bw49 enppadd_bw50 enppadd_bw51 ///
 enppadd_bw52 enppadd_bw53 enppadd_bw54 enppadd_bw55 enppadd_bw56 enppadd_bw57 ///
 enppadd_bw58 enppadd_bw59 enppadd_bw60 enppadd_bw61 enppadd_bw62 enppadd_bw63 ///
 enppadd_bw64 enppadd_bw65 enppadd_bw66 enppadd_bw67 enppadd_bw68 enppadd_bw69 ///
 enppadd_bw70 enppadd_bw71 enppadd_bw72 enppadd_bw73 enppadd_bw74 enppadd_bw75 ///
 enppadd_bw76 enppadd_bw77 enppadd_bw78 enppadd_bw79 enppadd_bw80 enppadd_bw81 ///
 enppadd_bw82 enppadd_bw83 enppadd_bw84 enppadd_bw85 enppadd_bw86 enppadd_bw87 ///
 enppadd_bw88 enppadd_bw89 enppadd_bw90 enppadd_bw91 enppadd_bw92 enppadd_bw93 ///
 enppadd_bw94 enppadd_bw95 enppadd_bw96 enppadd_bw97 enppadd_bw98 enppadd_bw99 ///
 enppadd_bw100 sum_pervote_parl ///
 (max) treated_closest_party_* perf_closest_party_* cnt_nonparl_parties ///
(sum) inbw10 inbw11 inbw12 inbw13 inbw14 inbw15 inbw16 inbw17 inbw18 inbw19 inbw20 ///
inbw21 inbw22 inbw23 inbw24 inbw25 inbw26 inbw27 inbw28 inbw29 inbw30 inbw31 ///
inbw32 inbw33 inbw34 inbw35 inbw36 inbw37 inbw38 inbw39 inbw40 inbw41 inbw42 ///
inbw43 inbw44 inbw45 inbw46 inbw47 inbw48 inbw49 inbw50 inbw51 inbw52 inbw53 ///
inbw54 inbw55 inbw56 inbw57 inbw58 inbw59 inbw60 inbw61 inbw62 inbw63 inbw64 ///
inbw65 inbw66 inbw67 inbw68 inbw69 inbw70 inbw71 inbw72 inbw73 inbw74 inbw75 ///
inbw76 inbw77 inbw78 inbw79 inbw80 inbw81 inbw82 inbw83 inbw84 inbw85 inbw86 ///
inbw87 inbw88 inbw89 inbw90 inbw91 inbw92 inbw93 inbw94 inbw95 inbw96 inbw97 ///
inbw98 inbw99 inbw100 ///
inbw10_below inbw11_below inbw12_below inbw13_below inbw14_below inbw15_below ///
inbw16_below inbw17_below inbw18_below inbw19_below inbw20_below ///
inbw21_below inbw22_below inbw23_below inbw24_below inbw25_below ///
inbw26_below inbw27_below inbw28_below inbw29_below inbw30_below inbw31_below ///
inbw32_below inbw33_below inbw34_below inbw35_below inbw36_below inbw37_below ///
inbw38_below inbw39_below inbw40_below inbw41_below inbw42_below ///
inbw43_below inbw44_below inbw45_below inbw46_below inbw47_below inbw48_below /// 
inbw49_below inbw50_below inbw51_below inbw52_below inbw53_below ///
inbw54_below inbw55_below inbw56_below inbw57_below inbw58_below inbw59_below /// 
inbw60_below inbw61_below inbw62_below inbw63_below inbw64_below ///
inbw65_below inbw66_below inbw67_below inbw68_below inbw69_below inbw70_below ///
inbw71_below inbw72_below inbw73_below inbw74_below inbw75_below ///
inbw76_below inbw77_below inbw78_below inbw79_below inbw80_below inbw81_below /// 
inbw82_below inbw83_below inbw84_below inbw85_below inbw86_below ///
inbw87_below inbw88_below inbw89_below inbw90_below inbw91_below inbw92_below ///
inbw93_below inbw94_below inbw95_below inbw96_below inbw97_below ///
inbw98_below inbw99_below inbw100_below, by(cntrydate)

* Generating variable that is log of number of parties inside each bandwidth
/* Note: many elections have a value of 0, so it is not possible to log them.
I added 1 to all of the values, so that the log becomes possible. 
*/
gen inbw20_log = log(inbw20+1)
gen inbw30_log = log(inbw30+1)
gen inbw40_log = log(inbw40+1)
gen inbw50_log = log(inbw50+1)

gen enppadd_bw20_log = log(enppadd_bw20+1)
gen enppadd_bw30_log = log(enppadd_bw30+1)
gen enppadd_bw40_log = log(enppadd_bw40+1)
gen enppadd_bw50_log = log(enppadd_bw50+1)

gen poladd_bw20_log = log(poladd_bw20+1)
gen poladd_bw30_log = log(poladd_bw30+1)
gen poladd_bw40_log = log(poladd_bw40+1)
gen poladd_bw50_log = log(poladd_bw50+1)

* Generating polynomials of variable year
gen year2 = year^2
gen year3 = year^3

* Generating a variable that indicates number of parties and parties in the bandwidth in previous election
sort country date

forvalues y = 10(1)100 {
by country: gen inbw`y'_below_tm1 = inbw`y'_below[_n-1]
}

by country: gen partiest_tm1 = cnt_parties[_n-1]

by country: gen partiesp_tm1 = cnt_parl_parties[_n-1]

by country: gen enep_tm1 = enep[_n-1]

by country: gen enep_parl_tm1 = enep_parl[_n-1]

by country: gen rile_polarization_tm1 = rile_polarization[_n-1]

by country: gen rile_polarization_parl_tm1 = rile_polarization_parl[_n-1]

by country: gen enppadd_bw20_tm1 = enppadd_bw20[_n-1]
by country: gen enppadd_bw30_tm1 = enppadd_bw30[_n-1]
by country: gen enppadd_bw40_tm1 = enppadd_bw40[_n-1]
by country: gen enppadd_bw50_tm1 = enppadd_bw50[_n-1]

by country: gen poladd_bw20_tm1 = poladd_bw20[_n-1]
by country: gen poladd_bw30_tm1 = poladd_bw30[_n-1]
by country: gen poladd_bw40_tm1 = poladd_bw40[_n-1]
by country: gen poladd_bw50_tm1 = poladd_bw50[_n-1]

by country: gen enpp_tm1 = enpp[_n-1]

by country: gen enppadd_bw50_log_tm1 = enppadd_bw50_log[_n-1]
by country: gen inbw50_log_tm1 = inbw50_log[_n-1]

by country: gen sum_pervote_parl_tm1 = sum_pervote_parl[_n-1]

* Generating lags of the instruments
local lags treated_closest_party_1 treated_closest_party_2 treated_closest_party_3 ///
treated_closest_party_4 perf_closest_party_1 perf_closest_party_1_sq perf_closest_party_1_3 perf_closest_party_1_4 perf_closest_party_2 perf_closest_party_2_sq perf_closest_party_2_3 perf_closest_party_2_4 perf_closest_party_3 perf_closest_party_3_sq perf_closest_party_3_3 perf_closest_party_3_4 perf_closest_party_4 perf_closest_party_4_sq perf_closest_party_4_3 perf_closest_party_4_4 perf_closest_party_1_norm perf_closest_party_1_norm_sq perf_closest_party_1_norm_3 perf_closest_party_1_norm_4 perf_closest_party_2_norm perf_closest_party_2_norm_sq perf_closest_party_2_norm_3 perf_closest_party_2_norm_4 perf_closest_party_3_norm perf_closest_party_3_norm_sq perf_closest_party_3_norm_3 perf_closest_party_3_norm_4 perf_closest_party_4_norm perf_closest_party_4_norm_sq perf_closest_party_4_norm_3 perf_closest_party_4_norm_4

foreach variable in `lags'{
by country: gen `variable'_tm1 = `variable'[_n-1]
by country: gen `variable'_tm2 = `variable'[_n-2]

}

* Generating ENPP and total number of parl parties two elections before
by country: gen enppadd_bw50_tm2 = enppadd_bw50[_n-2]
by country: gen partiesp_tm2 = cnt_parl_parties[_n-2]
by country: gen enpp_tm2 = enpp[_n-2]

** Labelling variables
*label var cntrydate "Country and date of election"
label var year "Year"
label var eu "EU member or applicant"
label var threshold "Electoral threshold"
label var enep "Effective Number of Electoral Parties"
label var enpp "Effective Number of Parliamentary Parties"
label var cnt_parl_parties "Number of Parliamentary Parties"
label var rile_polarization "Dalton's polarization Index"
label var rile_polarization_parl "Dalton's polarization Index (using parliamentary parties only)"
label var country "Country"
label var cnt_parties "Total number of parties included in CMP"
label var sum_seats "Total number of seats owned by parties included in CMP"
label var poladd_bw20 "Added level of polarization by parties inside the BW up to 20% above electoral threshold"
label var poladd_bw30 "Added level of polarization by parties inside the BW up to 30% above electoral threshold"
label var poladd_bw40 "Added level of polarization by parties inside the BW up to 40% above electoral threshold"
label var poladd_bw50 "Added level of polarization by parties inside the BW up to 50% above electoral threshold"
label var enppadd_bw20 "Added level of ENPP by parties inside the BW up to 20% above electoral threshold"
label var enppadd_bw30 "Added level of ENPP by parties inside the BW up to 30% above electoral threshold"
label var enppadd_bw40 "Added level of ENPP by parties inside the BW up to 40% above electoral threshold"
label var enppadd_bw50 "Added level of ENPP by parties inside the BW up to 50% above electoral threshold"
label var polity2 "Polity 4 score"
label var inbw20 "Number of inside the BW up to 20% above electoral threshold"
label var inbw30 "Number of inside the BW up to 30% above electoral threshold"
label var inbw40 "Number of inside the BW up to 40% above electoral threshold"
label var inbw50 "Number of inside the BW up to 50% above electoral threshold"
label var enep_parl "Effective number of eletoral parties, using only parliamentary parties"
label var inbw20_log "Logged number of inside the BW up to 20% above electoral threshold"
label var inbw30_log "Logged number of inside the BW up to 30% above electoral threshold"
label var inbw40_log "Logged number of inside the BW up to 40% above electoral threshold"
label var inbw50_log "Logged number of inside the BW up to 50% above electoral threshold"
label var enppadd_bw20_log "Logged added level of ENPP by parties inside the BW up to 20% above electoral threshold"
label var enppadd_bw30_log "Logged added level of ENPP by parties inside the BW up to 30% above electoral threshold"
label var enppadd_bw40_log "Logged added level of ENPP by parties inside the BW up to 40% above electoral threshold"
label var enppadd_bw50_log "Logged added level of ENPP by parties inside the BW up to 50% above electoral threshold"
label var poladd_bw20_log "Logged added level of polarization by parties inside the BW up to 20% above electoral threshold"
label var poladd_bw30_log "Logged added level of polarization by parties inside the BW up to 30% above electoral threshold"
label var poladd_bw40_log "Logged added level of polarization by parties inside the BW up to 40% above electoral threshold"
label var poladd_bw50_log "Logged added level of polarization by parties inside the BW up to 50% above electoral threshold"
label var year2 "Year^2"
label var year3 "Year^3"
label var partiest_tm1 "Total number of parties in CMP in the previous election in the country"
label var partiesp_tm1 "Total number of parliamentary parties in CMP in the previous election in the country"
label var enep_tm1 "ENEP in the previous election in the country"
label var enep_tm1 "ENEP (using parliamentary parties only) in the previous election in the country"
label var rile_polarization_tm1 "Level of polarization in the previous election in the country"
label var rile_polarization_parl_tm1 "Level of polarization (using parliamentary parties only) in the previous election in the country"

order country, b(year)

** Generating variable that represents number of months to next election

gen month = substr(string(date),-2,2)
destring month, replace

gen date2 = ym(year, month)

sort country date
by country: gen distance_nextel = (date2[_n+1] - date2)

** Creating dummies for each country 
tostring country, replace
replace year = floor(year)
tostring year, replace
gen cntryyear = country + year
destring year, replace
destring country, replace

* Generating variable with country name
generate str country_name = substr(cntrydate, 1, strlen(cntrydate) - 6)

* Generating country-year variable
tostring year, replace
gen cntryyear2 = country_name + year
destring year, replace

save Data/fragpol_electionlevel_cmp, replace

** Merging with V-Dem data **
use Data/vdem.dta, clear

* Dropping years not in the other dataset
drop if year < 1946

* Generating country-year variable
tostring year, replace
gen cntryyear2 = country_name + year
destring year, replace

gen cntryyear_5after = cntryyear2
gen cntryyear_10after = cntryyear2

save Data/vdem_manipulated.dta, replace

merge 1:m cntryyear2 using Data/fragpol_electionlevel_cmp

* Dropping VDem countries that are not in the other dataset
drop if _merge == 1

destring year, replace

* renaming dependent variables
rename v2eltrnout turnout_2
rename v2dlencmps public_goods
rename v2pepwrsoc power_distr_socgroup
rename v2lgfemleg percent_female_mps
rename v2x_freexp_altinf freedom_exp
rename v2x_frassoc_thick freedom_assoc
rename v2xcl_rol equality_before_law
rename v2x_jucon judicial_constrains_exec
rename v2x_partip participatory_dem_ind
rename v2x_cspart civ_soc_particip_index
rename v2xdd_dd direct_popular_vote_index
rename v2eldonate disclosure_camp_donations
rename v2elpubfin public_campaign_finance
rename v2elboycot election_boycotts
rename v2elfrfair election_free_fair
rename v2elaccept loser_accept_election
rename v2pscohesv legislative_party_cohesion
rename v2pscomprg party_comp_across_regions
rename v2psnatpar national_party_control
rename v2exbribe exec_bribery_corruption
rename v2exembez exec_embezzlement_theft
rename v2excrptps public_sector_corruption
rename v2lgqstexp legisl_questions_officials
rename v2lginvstp legislature_investigates
rename v2lgotovst exec_oversight
rename v2lgcrrpt legislature_corruption
rename v2lgdsadlo repr_disadvantaged_groups
rename v2lgqugen lower_chamber_gender_quota
rename v2jupoatck gov_attacks_judiciary
rename v2juhcind high_court_independence
rename v2juhccomp compliance_with_high_court
rename v2jucomp compliance_with_judiciary
rename v2jureview judicial_review
rename v2clacfree free_acad_cultural_expr
rename v2clrelig freedom_religion
rename v2cldmovew free_dom_movmnt_women
rename v2clacjstw access_justice_women
rename v2mecenefm gov_censor_effort_media
rename v2mecenefi gov_censor_effort_internet
rename v2pepwrgen power_distributed_by_gender
rename v2pepwrort power_distributed_sexorient
rename v3elncbpr eff_no_cabinet_parties

save Data/fragpol_merged_vdem, replace

* Creating dataset for V-dem variables 5 years after election
use Data/vdem_manipulated.dta, clear

rename v2eltrnout turnout_5
rename v2dlencmps public_goods_5
rename v2x_polyarchy v2x_polyarchy_5
rename v2x_libdem v2x_libdem_5
rename v2x_partipdem v2x_partipdem_5
rename v2x_delibdem v2x_delibdem_5
rename v2x_egaldem v2x_egaldem_5
rename v2pepwrsoc power_distr_socgroup_5
rename v2lgfemleg percent_female_mps_5
rename v2x_freexp_altinf freedom_exp_5
rename v2x_frassoc_thick freedom_assoc_5
rename v2xcl_rol equality_before_law_5
rename v2x_jucon judicial_constrains_exec_5
rename v2x_partip participatory_dem_ind_5
rename v2x_cspart civ_soc_particip_index_5
rename v2xdd_dd direct_pop_vote_index_5
rename v2eldonate disclosure_camp_donations_5
rename v2elpubfin public_campaign_finance_5
rename v2elboycot election_boycotts_5
rename v2elfrfair election_free_fair_5
rename v2elaccept loser_accept_election_5
rename v2pscohesv legisl_party_cohesion_5
rename v2pscomprg party_comp_across_regions_5
rename v2psnatpar national_party_control_5
rename v2exbribe exec_bribery_corruption_5
rename v2exembez exec_embezzlement_theft_5
rename v2excrptps public_sector_corruption_5
rename v2lgqstexp legi_questions_officials_5
rename v2lginvstp legislature_investigates_5
rename v2lgotovst exec_oversight_5
rename v2lgcrrpt legislature_corruption_5
rename v2lgdsadlo repr_disadvantaged_groups_5
rename v2lgqugen low_chamber_gender_quota_5
rename v2jupoatck gov_attacks_judiciary_5
rename v2juhcind high_court_independence_5
rename v2juhccomp compliance_w_high_court_5
rename v2jucomp compliance_with_judiciary_5
rename v2jureview judicial_review_5
rename v2clacfree free_acad_cultural_expr_5
rename v2clrelig freedom_religion_5
rename v2cldmovew free_dom_movmnt_women_5
rename v2clacjstw access_justice_women_5
rename v2mecenefm gov_censor_effort_media_5
rename v2mecenefi gov_censor_effort_net_5
rename v2pepwrgen power_distributed_by_gndr_5
rename v2pepwrort pwr_distributd_sexorient_5
rename v3elncbpr eff_no_cabinet_parties_5
rename v2xeg_eqdr equal_distr_resources_5
rename e_Vanhanen_epower_ipo equal_distr_res_index_ord_5
rename v2x_veracc vertical_acc_index_5
rename v2x_diagacc diagonal_acc_index_5
rename v2x_horacc horizontal_acc_index_5
rename v2x_gencl women_civil_lib_index_5
rename v2x_gencs women_civ_soc_partic_index_5
rename v2x_gender women_pol_empowerment_index_5
rename v2x_genpp women_pol_particip_index_5
rename v2x_corr pol_corruption_index_5
rename v2pepwrses power_distr_socioeco_pos_5

save Data/vdem_manipulated_5after.dta, replace

* Creating dataset for V-dem variables 10 years after election
use Data/vdem_manipulated.dta, clear

rename v2eltrnout turnout_10
rename v2dlencmps public_goods_10
rename v2x_polyarchy v2x_polyarchy_10
rename v2x_libdem v2x_libdem_10
rename v2x_partipdem v2x_partipdem_10
rename v2x_delibdem v2x_delibdem_10
rename v2x_egaldem v2x_egaldem_10
rename v2pepwrsoc power_distr_socgroup_10
rename v2lgfemleg percent_female_mps_10
rename v2x_freexp_altinf freedom_exp_10
rename v2x_frassoc_thick freedom_assoc_10
rename v2xcl_rol equality_before_law_10
rename v2x_jucon judicial_constrains_exec_10
rename v2x_partip participatory_dem_ind_10
rename v2x_cspart civ_soc_particip_index_10
rename v2xdd_dd direct_pop_vote_index_10
rename v2eldonate disclose_camp_donations_10
rename v2elpubfin public_campaign_finance_10
rename v2elboycot election_boycotts_10
rename v2elfrfair election_free_fair_10
rename v2elaccept loser_accept_election_10
rename v2pscohesv legisl_party_cohesion_10
rename v2pscomprg partycomp_across_regions_10
rename v2psnatpar national_party_control_10
rename v2exbribe exec_bribery_corruption_10
rename v2exembez exec_embezzlement_theft_10
rename v2excrptps public_sector_corruption_10
rename v2lgqstexp leg_questions_officials_10
rename v2lginvstp legislature_investigates_10
rename v2lgotovst exec_oversight_10
rename v2lgcrrpt legislature_corruption_10
rename v2lgdsadlo repr_disadv_groups_10
rename v2lgqugen low_chamber_gender_quota_10
rename v2jupoatck gov_attacks_judiciary_10
rename v2juhcind high_court_independence_10
rename v2juhccomp compliance_w_high_court_10
rename v2jucomp compliance_w_judiciary_10
rename v2jureview judicial_review_10
rename v2clacfree free_acad_cultural_expr_10
rename v2clrelig freedom_religion_10
rename v2cldmovew free_dom_movmnt_women_10
rename v2clacjstw access_justice_women_10
rename v2mecenefm gov_censor_effort_media_10
rename v2mecenefi gov_censor_effort_net_10
rename v2pepwrgen power_distributd_by_gndr_10
rename v2pepwrort pwr_distributd_sexorient_10
rename v3elncbpr eff_no_cabinet_parties_10
rename v2xeg_eqdr equal_distr_resources_10
rename e_Vanhanen_epower_ipo eq_distr_res_index_ord_10
rename v2x_veracc vertical_acc_index_10
rename v2x_diagacc diagonal_acc_index_10
rename v2x_horacc horizontal_acc_index_10
rename v2x_gencl women_civil_lib_index_10
rename v2x_gencs women_civ_soc_partic_index_10
rename v2x_gender women_pol_empowerment_index_10
rename v2x_genpp women_pol_particip_index_10
rename v2x_corr pol_corruption_index_10
rename v2pepwrses power_distr_socioeco_pos_10

save Data/vdem_manipulated_10after.dta, replace

* Merging with data from QoG
use Data/qog_std_ts_jan19, clear

tostring year, replace
gen cntryyear2 = cname + year
destring year, replace

keep cntryyear2 cses_sd hum_satdem wvs_satfin wvs_satlif

gen cntryyear_5after = cntryyear2
gen cntryyear_10after = cntryyear2

save Data/qog_manipulated.dta, replace

merge m:m cntryyear2 using Data/fragpol_merged_vdem, gen(merge)

drop if merge == 1
drop merge

save Data/fragpol_merged_vdem_qog, replace

* Merging with data from World Bank (political variables)
use Data/DPI2012.dta, clear

replace countryname = "Czech Republic" if countryname == "Czech Rep."
replace countryname = "Germany" if countryname == "FRG/Germany"
tostring year, replace
gen cntryyear2 = countryname + year
destring year, replace

rename oppmajh lower_house_majority
replace lower_house_majority = . if lower_house_majority == -999
rename govfrac gov_fract_index 
replace gov_fract_index = . if gov_fract_index == -999
sum gov_fract_index
replace lower_house_majority = . if lower_house_majority == -999

keep cntryyear2 lower_house_majority gov_fract_index 

gen cntryyear_5after = cntryyear2
gen cntryyear_10after = cntryyear2

save Data/worldbank_manipulated.dta, replace

merge m:m cntryyear2 using Data/fragpol_merged_vdem_qog, gen(merge)

drop if merge == 1

save Data/fragpol_merged_vdem_qog_wb.dta, replace

* Creating dataset for QoG variables 5 years after election
use Data/qog_manipulated.dta, clear

rename cses_sd cses_sd_5
rename hum_satdem hum_satdem_5
rename wvs_satfin wvs_satfin_5
rename wvs_satlif wvs_satlif_5

save Data/qog_manipulated_5after.dta, replace

* Creating dataset for QoG variables 10 years after election
use Data/qog_manipulated.dta, clear

rename cses_sd cses_sd_10
rename hum_satdem hum_satdem_10
rename wvs_satfin wvs_satfin_10
rename wvs_satlif wvs_satlif_10

save Data/qog_manipulated_10after.dta, replace

* Creating dataset for World Bank variables 5 years after election
use Data/worldbank_manipulated.dta, clear

keep cntryyear2 lower_house_majority gov_fract_index cntryyear_5after 

rename lower_house_majority lower_house_majority_5
rename gov_fract_index gov_fract_index_5

save Data/worldbank_manipulated_5after.dta, replace

* Creating dataset for World Bank variables 10 years after election
use Data/worldbank_manipulated.dta, clear

keep cntryyear2 lower_house_majority gov_fract_index cntryyear_10after

rename lower_house_majority lower_house_majority_10
rename gov_fract_index gov_fract_index_10

save Data/worldbank_manipulated_10after.dta, replace

* Creating locals for each type of participation
use Data/fragpol_merged_vdem_qog_wb, clear

rename v2xeg_eqdr equal_distr_resources
rename e_Vanhanen_epower_ipo equal_distr_res_index_ord
rename v2x_veracc vertical_acc_index
rename v2x_diagacc diagonal_acc_index
rename v2x_horacc horizontal_acc_index
rename v2x_gencl women_civil_lib_index
rename v2x_gencs women_civ_soc_partic_index
rename v2x_gender women_pol_empowerment_index
rename v2x_genpp women_pol_particip_index
rename v2x_corr political_corruption_index
rename v2pepwrses power_distr_socioeco_pos


* Adding data for specific number of years after election (5 and 10)
gen year5 = year + 5
gen year10 = year + 10
tostring year5, replace
tostring year10, replace
drop cntryyear_5after
drop cntryyear_10after
gen cntryyear_5after = country_name + year5
gen cntryyear_10after = country_name + year10
destring year5, replace
destring year10, replace

drop _merge
merge m:m cntryyear_5after using Data/vdem_manipulated_5after.dta
drop if _merge == 2
drop _merge
merge m:m cntryyear_10after using Data/vdem_manipulated_10after.dta
drop if _merge == 2
drop _merge
merge m:m cntryyear_5after using Data/qog_manipulated_5after.dta
drop if _merge == 2
drop _merge
merge m:m cntryyear_10after using Data/qog_manipulated_10after.dta
drop if _merge == 2
drop _merge
merge m:m cntryyear_5after using Data/worldbank_manipulated_5after.dta
drop if _merge == 2
drop _merge
merge m:m cntryyear_10after using Data/worldbank_manipulated_10after.dta
drop if _merge == 2
drop _merge
merge m:m cntryyear_5after using Data/worldbank_deficit_manipulated_5after.dta
drop if _merge == 2
drop _merge
merge m:m cntryyear_10after using Data/worldbank_deficit_manipulated_10after.dta
drop if _merge == 2
drop _merge

* Generating variable indicating european countries 
gen european = 1
replace european = 0 if country_name == "Armenia"
replace european = 0 if country_name == "Mexico"
replace european = 0 if country_name == "Bolivia"
replace european = 0 if country_name == "Israel"
replace european = 0 if country_name == "Georgia"
replace european = 0 if country_name == "Israel"
replace european = 0 if country_name == "Mexico"
replace european = 0 if country_name == "New Zealand"
replace european = 0 if country_name == "Russia"
replace european = 0 if country_name == "Turkey"
replace european = 0 if country_name == "Seychelles"
replace european = 0 if country_name == "Peru"
replace european = 0 if country_name == "Taiwan"
replace european = 0 if country_name == "South Korea"


save Data/fragmentation_final, replace

** Keeping only variables that matter to us 
use Data/fragmentation_final, clear

* Generating a variable of closeness to next election (so we can do the PCAs)
gen closeness_nextel = 63 - distance_nextel

* Generating gov frationalization one election before
sort country date
by country: gen gov_fract_index_tm1 = gov_fract_index[_n-1]

* There's two variables for turnout: one from Vdem, one collected by us. I drop ours and stick to the one from VDem
rename turnout_2 turnout

* Keeping only variables that matter
keep year turnout european threshold date enep enpp enpp_tm1 partiesp_tm1 ///
partiesp_tm1 cnt_parl_parties enppadd_bw50_log_tm1 poladd_bw50  ///
rile_polarization rile_polarization_parl  country ///
cnt_parties sum_seats poladd_bw20 poladd_bw30 poladd_bw40 poladd_bw50 inbw50_log ///
enppadd_bw20 enppadd_bw30 enppadd_bw40 enppadd_bw50 enppadd_bw50_log polity2 enep_parl inbw50 cntrydate ///
turnout exec_bribery_corruption exec_embezzlement_theft ///
public_sector_corruption ///
 legislature_corruption percent_female_mps ///
repr_disadvantaged_groups ///
power_distributed_by_gender power_distributed_sexorient ///
distance_nextel   vertical_acc_index ///
diagonal_acc_index public_goods enppadd_bw50_tm1 inbw50_log_tm1 enppadd_bw50_tm2 ///
horizontal_acc_index  gov_fract_index power_distr_socgroup enpp_tm2 ///
exec_bribery_corruption_5 exec_embezzlement_theft_5 public_sector_corruption_5  ///
 legislature_corruption_5 percent_female_mps_5 ///
repr_disadvantaged_groups_5 ///
power_distributed_by_gndr pwr_distributd_sexorient_5  ///
  power_distr_socgroup_5 ///
 pol_corruption_index_5 ///
  vertical_acc_index_5 diagonal_acc_index_5 ///
horizontal_acc_index_5 power_distr_socgroup public_goods_5 ///
exec_bribery_corruption_10 exec_embezzlement_theft_10 public_sector_corruption_10  ///
 legislature_corruption_10 percent_female_mps_10 ///
repr_disadv_groups_10  ///
power_distributd_by_gndr_10 pwr_distributd_sexorient_10  ///
 pol_corruption_index_10 ///
  vertical_acc_index_10 diagonal_acc_index_10 ///
horizontal_acc_index_10 power_distr_socgroup_10 public_goods_10 closeness_nextel ///
inbw10 inbw11 inbw12 inbw13 inbw14 inbw15 inbw16 inbw17 inbw18 inbw19 inbw20 ///
inbw21 inbw22 inbw23 inbw24 inbw25 inbw26 inbw27 inbw28 inbw29 inbw30 inbw31 ///
inbw32 inbw33 inbw34 inbw35 inbw36 inbw37 inbw38 inbw39 inbw40 inbw41 inbw42 ///
inbw43 inbw44 inbw45 inbw46 inbw47 inbw48 inbw49 inbw50 inbw51 inbw52 inbw53 ///
inbw54 inbw55 inbw56 inbw57 inbw58 inbw59 inbw60 inbw61 inbw62 inbw63 inbw64 ///
inbw65 inbw66 inbw67 inbw68 inbw69 inbw70 inbw71 inbw72 inbw73 inbw74 inbw75 ///
inbw76 inbw77 inbw78 inbw79 inbw80 inbw81 inbw82 inbw83 inbw84 inbw85 inbw86 ///
inbw87 inbw88 inbw89 inbw90 inbw91 inbw92 inbw93 inbw94 inbw95 inbw96 inbw97 ///
inbw98 inbw99 inbw100 ///
enppadd_bw10 enppadd_bw11 enppadd_bw12 enppadd_bw13 enppadd_bw14 enppadd_bw15 ///
 enppadd_bw16 enppadd_bw17 enppadd_bw18 enppadd_bw19 enppadd_bw20 enppadd_bw21 ///
 enppadd_bw22 enppadd_bw23 enppadd_bw24 enppadd_bw25 enppadd_bw26 enppadd_bw27 ///
 enppadd_bw28 enppadd_bw29 enppadd_bw30 enppadd_bw31 enppadd_bw32 enppadd_bw33 ///
 enppadd_bw34 enppadd_bw35 enppadd_bw36 enppadd_bw37 enppadd_bw38 enppadd_bw39 ///
 enppadd_bw40 enppadd_bw41 enppadd_bw42 enppadd_bw43 enppadd_bw44 enppadd_bw45 ///
 enppadd_bw46 enppadd_bw47 enppadd_bw48 enppadd_bw49 enppadd_bw50 enppadd_bw51 ///
 enppadd_bw52 enppadd_bw53 enppadd_bw54 enppadd_bw55 enppadd_bw56 enppadd_bw57 ///
 enppadd_bw58 enppadd_bw59 enppadd_bw60 enppadd_bw61 enppadd_bw62 enppadd_bw63 ///
 enppadd_bw64 enppadd_bw65 enppadd_bw66 enppadd_bw67 enppadd_bw68 enppadd_bw69 ///
 enppadd_bw70 enppadd_bw71 enppadd_bw72 enppadd_bw73 enppadd_bw74 enppadd_bw75 ///
 enppadd_bw76 enppadd_bw77 enppadd_bw78 enppadd_bw79 enppadd_bw80 enppadd_bw81 ///
 enppadd_bw82 enppadd_bw83 enppadd_bw84 enppadd_bw85 enppadd_bw86 enppadd_bw87 ///
 enppadd_bw88 enppadd_bw89 enppadd_bw90 enppadd_bw91 enppadd_bw92 enppadd_bw93 ///
 enppadd_bw94 enppadd_bw95 enppadd_bw96 enppadd_bw97 enppadd_bw98 enppadd_bw99 ///
 enppadd_bw100 sum_pervote gov_fract_index_tm1 ///
 v2x_polyarchy v2x_libdem v2x_partipdem v2x_delibdem v2x_egaldem ///
 v2x_polyarchy_5 v2x_libdem_5 v2x_partipdem_5 v2x_delibdem_5 v2x_egaldem_5 ///
 v2x_polyarchy_10 v2x_libdem_10 v2x_partipdem_10 v2x_delibdem_10 v2x_egaldem_10 ///
treated_closest_party_* perf_closest_party_* inbw50_below ///
 inbw10_below inbw11_below inbw12_below inbw13_below inbw14_below inbw15_below ///
inbw16_below inbw17_below inbw18_below inbw19_below inbw20_below ///
inbw21_below inbw22_below inbw23_below inbw24_below inbw25_below ///
inbw26_below inbw27_below inbw28_below inbw29_below inbw30_below inbw31_below ///
inbw32_below inbw33_below inbw34_below inbw35_below inbw36_below inbw37_below ///
inbw38_below inbw39_below inbw40_below inbw41_below inbw42_below ///
inbw43_below inbw44_below inbw45_below inbw46_below inbw47_below inbw48_below /// 
inbw49_below inbw50_below inbw51_below inbw52_below inbw53_below ///
inbw54_below inbw55_below inbw56_below inbw57_below inbw58_below inbw59_below /// 
inbw60_below inbw61_below inbw62_below inbw63_below inbw64_below ///
inbw65_below inbw66_below inbw67_below inbw68_below inbw69_below inbw70_below ///
inbw71_below inbw72_below inbw73_below inbw74_below inbw75_below ///
inbw76_below inbw77_below inbw78_below inbw79_below inbw80_below inbw81_below /// 
inbw82_below inbw83_below inbw84_below inbw85_below inbw86_below ///
inbw87_below inbw88_below inbw89_below inbw90_below inbw91_below inbw92_below ///
inbw93_below inbw94_below inbw95_below inbw96_below inbw97_below ///
inbw98_below inbw99_below inbw100_below poladd_bw50_tm1 ///
cnt_nonparl_parties sum_pervote_parl sum_pervote_parl_tm1

* Standardizing dependent variables

local outcomes turnout exec_bribery_corruption exec_embezzlement_theft ///
public_sector_corruption ///
legislature_corruption percent_female_mps ///
repr_disadvantaged_groups ///
power_distributed_by_gender power_distributed_sexorient ///
closeness_nextel vertical_acc_index ///
diagonal_acc_index public_goods ///
horizontal_acc_index gov_fract_index power_distr_socgroup ///
v2x_polyarchy v2x_libdem v2x_partipdem v2x_delibdem v2x_egaldem

foreach outcome in `outcomes'{
egen `outcome'_avg = mean(`outcome')
egen `outcome'_sdv = sd(`outcome')
gen `outcome'_stnd = (`outcome' - `outcome'_avg) / `outcome'_sdv
}

* Standardizing dependent variables, measured 5 years after treatment
local outcomes_5 exec_bribery_corruption exec_embezzlement_theft public_sector_corruption  ///
 legislature_corruption percent_female_mps ///
repr_disadvantaged_groups ///
power_distributed_by_gndr pwr_distributd_sexorient  ///
  vertical_acc_index diagonal_acc_index ///
horizontal_acc_index power_distr_socgroup public_goods ///
v2x_polyarchy v2x_libdem v2x_partipdem v2x_delibdem v2x_egaldem

foreach outcome in `outcomes_5'{
egen `outcome'_5_avg = mean(`outcome'_5)
egen `outcome'_5_sdv = sd(`outcome'_5)
gen `outcome'_5_stnd = (`outcome'_5 - `outcome'_5_avg) / `outcome'_5_sdv
}

* Standardizing dependent variables, measured 10 years after treatment

local outcomes_10 exec_bribery_corruption exec_embezzlement_theft public_sector_corruption  ///
 legislature_corruption percent_female_mps ///
repr_disadv_groups power_distributd_by_gndr pwr_distributd_sexorient  ///
 vertical_acc_index diagonal_acc_index ///
horizontal_acc_index power_distr_socgroup public_goods ///
v2x_polyarchy v2x_libdem v2x_partipdem v2x_delibdem v2x_egaldem 

* Standardizing dependent variables
foreach outcome in `outcomes_10'{
egen `outcome'_10_avg = mean(`outcome'_10)
egen `outcome'_10_sdv = sd(`outcome'_10)
gen `outcome'_10_stnd = (`outcome'_10 - `outcome'_10_avg) / `outcome'_10_sdv
}

* Creating PCA's for each type of outcome

* One for summary measures 
pca  v2x_polyarchy v2x_libdem v2x_partipdem v2x_delibdem v2x_egaldem
predict pcsummary
egen comp_summary_stnd = std(pcsummary)

* One for corruption outcomes
pca exec_bribery_corruption exec_embezzlement_theft public_sector_corruption ///
legislature_corruption, comp(1)
predict pc1
egen comp_corruption_stnd = std(pc1)

* One for accountability outcomes
pca vertical_acc_index diagonal_acc_index horizontal_acc_index, comp(1)
predict pc2
egen comp_accountability_stnd = std(pc2)

* One for outcomes on descriptive repr of women
pca percent_female_mps power_distributed_by_gender, comp(1)
predict pc3
egen comp_womenrep_stnd = std(pc3)

* One for outcomes onrepresentation of underpriv groups
pca power_distributed_sexorient power_distr_socgroup repr_disadvantaged_groups, comp(1)
predict pc6
egen comp_underpriviliged_stnd = std(pc6)

* Creating PCA's for each type of outcome, measured 5 years later
pca exec_bribery_corruption_5 exec_embezzlement_theft_5 public_sector_corruption_5  ///
legislature_corruption_5 percent_female_mps_5 ///
repr_disadvantaged_groups_5 ///
power_distributed_by_gndr_5 pwr_distributd_sexorient_5  ///
  vertical_acc_index diagonal_acc_index_5 ///
horizontal_acc_index_5 power_distr_socgroup_5 public_goods_5
predict pcall_5
egen comp_all_5_stnd = std(pcall_5)

pca  v2x_polyarchy_5 v2x_libdem_5 v2x_partipdem_5 v2x_delibdem_5 v2x_egaldem_5
predict pcsummary_5
egen comp_summary_5_stnd = std(pcsummary)

pca exec_bribery_corruption_5 exec_embezzlement_theft_5 public_sector_corruption_5 ///
legislature_corruption_5, comp(1)
predict pc7
egen comp_corruption_5_stnd = std(pc7)

pca vertical_acc_index_5 diagonal_acc_index_5 horizontal_acc_index_5, comp(1)
predict pc8
egen comp_accountability_5_stnd = std(pc8)

pca percent_female_mps_5 power_distributed_by_gndr_5, comp(1)
predict pc9
egen comp_womenrep_5_stnd = std(pc9)

pca power_distr_socgroup_5 repr_disadvantaged_groups_5 pwr_distributd_sexorient_5, comp(1) 
predict pc10
egen comp_underpriviliged_5_stnd = std(pc10)

* Creating PCA's for each type of outcome, measured 10 years later
pca  v2x_polyarchy_10 v2x_libdem_10 v2x_partipdem_10 v2x_delibdem_10 v2x_egaldem_10
predict pcsummary_10
egen comp_summary_10_stnd = std(pcsummary)

pca exec_bribery_corruption_10 exec_embezzlement_theft_10 public_sector_corruption_10 ///
legislature_corruption_10, comp(1)
predict pc11
egen comp_corruption_10_stnd = std(pc11)

pca vertical_acc_index_10 diagonal_acc_index_10 horizontal_acc_index_10, comp(1)
predict pc12
egen comp_accountability_10_stnd = std(pc12)

pca percent_female_mps_10 power_distributd_by_gndr_10, comp(1)
predict pc13
egen comp_womenrep_10_stnd = std(pc13)

pca power_distr_socgroup_10 repr_disadv_groups_10 pwr_distributd_sexorient_10, comp(1) 
predict pc14
egen comp_underpriviliged_10_stnd = std(pc14)

** Generating lagged variables for the instrument
drop enppadd_bw50_tm1
sort country date

forvalues x = 10/100{
	
by country: gen enppadd_bw`x'_tm1 = enppadd_bw`x'[_n-1]
by country: gen inbw`x'_tm1 = enppadd_bw`x'[_n-1]
by country: gen inbw`x'_below_tm1 = inbw`x'_below[_n-1]

}

by country: gen cnt_nonparl_parties_tm1 = cnt_nonparl_parties[_n-1]

** Generate variable for West European and East European countries

gen west_europe = 0

* Austria
replace west_europe = 1 if country == 3
* Denmark
replace west_europe = 1 if country == 9
* Germany
replace west_europe = 1 if country == 12
* Greece
replace west_europe = 1 if country == 13
* Iceland
replace west_europe = 1 if country == 15
* Italy
replace west_europe = 1 if country == 17
* Netherlands
replace west_europe = 1 if country == 24
* Norway
replace west_europe = 1 if country == 26
* Sweden
replace west_europe = 1 if country == 35



gen east_europe = 0
* Albania
replace east_europe = 1 if country == 1
* Armenia
replace east_europe = 1 if country == 2
*Bulgaria
replace east_europe = 1 if country == 5
* Croatia
replace east_europe = 1 if country == 6
* Czech Republic
replace east_europe = 1 if country == 8
* Estonia
replace east_europe = 1 if country == 10
* Georgia
replace east_europe = 1 if country == 11
* Hungary
replace east_europe = 1 if country == 14
* Latvia
replace east_europe = 1 if country == 18
* Lithuania
replace east_europe = 1 if country == 19
* Macedonia
replace east_europe = 1 if country == 20
* Moldova
replace east_europe = 1 if country == 22
* Montenegro
replace east_europe = 1 if country == 23
* Poland
replace east_europe = 1 if country == 28
* Romania
replace east_europe = 1 if country == 29
* Russia
replace east_europe = 1 if country == 30
* Slovakia
replace east_europe = 1 if country == 32
* Slovenia
replace east_europe = 1 if country == 33
* Ukraine
replace east_europe = 1 if country == 38

drop european 
gen european = 0

replace european = 1 if west_europe == 1
replace european = 1 if east_europe == 1

* Summarizing the variable that gives us the added level of polarization by parties inside the BW
sum rile_polarization, d

* Generating variable that is the median of that variable
egen rile_polarization_median = median(rile_polarization)

* Generating variables to split sample on
sum threshold, d
egen median_threshold = median(threshold)
egen enpp_tm2_median = median(enpp_tm2)

sort country date

by country: gen polarization_tm1 = rile_polarization[_n-1]
by country: gen polarization_tm2 = rile_polarization[_n-2]
by country: gen poladd_bw50_tm2 = poladd_bw50[_n-2]

egen polarization_tm1_median = median(polarization_tm1)
egen polarization_tm2_median = median(polarization_tm2)
egen poladd_bw50_tm1_median = median(poladd_bw50_tm1)
egen poladd_bw50_tm2_median = median(poladd_bw50_tm2)

saveold Data/fragmentation_electionlevel, replace version(12)

export delimited using Data/fragmentation_final.csv, replace

*****************
* Saving a CSV version of the datasets we will need later, with labels of parties in the elections used as examples
*****************

use Data/cmp_beforecollapse, clear

tostring party, replace force

* Fixing labels of Czech parties
replace party = "Communist Party of Bohemia and Moravia" if party == "82220"
replace party = "Social Democratic Party" if party == "82320"
replace party = "Civic Dem Party" if party == "82413"
replace party = "ANO 2011" if party == "82430"
replace party = "Christian and Democratic Union" if party == "82523"
replace party = "TOP 09" if party == "82530"
replace party = "Mayors and Independents" if party == "82610"
replace party = "Freedom and Direct Democracy" if party == "82721"
replace party = "Pirate Party" if party == "82953"

* Fixing labels of Bulgarian parties
replace party = "Citizens for European Dev of Bulgaria" if party == "80510"
replace party = "BSP-Left Bulgaria" if party == "80221"
replace party = "United Patriots" if party == "80071"
replace party = "Mov for Rights and Freedoms" if party == "80951"
replace party = "Will" if party == "80640"

export delimited using Data/cmp_beforecollapse.csv, replace

** Merging the collapsed and non-collapsed datasets, so we can calculate the optimal bandwidths for each outcome (for which we need party-level data)

use Data/fragmentation_partylevel, clear

merge m:m cntrydate using Data/fragmentation_electionlevel, gen(merge3)

* Generating a variable that has the vote share of each party as a proportion of the threshold
drop vote_as_prop_threshold
gen vote_as_prop_threshold = pervote / threshold

* Generating variable that tells us the distance between each party and the threshold in proportion
drop distance_threshold_prop
gen distance_threshold_prop = vote_as_prop_threshold - 1

* Checking if everything looks ok
sum distance_threshold_prop

* Saving this dataset
saveold Data/fragmentation_everything.dta, replace version(12)
